###################################################
### Project: Issue Competition in Parliamentary ###
###          Speeches?                          ###
### Title:   Preparation for Analysis -         ###
###          Alternative                        ###
### Author:  Christoph Ivanusch                 ###
###################################################

# preparation

## clear global environment
rm(list = ls()) 

## load packages
library(dplyr) #for data wrangling
library(tidyr) #for data wrangling

# load and prepare speech data ("Classification_Data.RDS")
data <- readRDS("Classification_Data.RDS")

data$party <- gsub("Jetzt – Liste PILZ", "PILZ", data$party) #get single name for party PILZ
data$party <- gsub("JETZT", "PILZ", data$party) #get single name for party PILZ

data <- filter(data, party != "LIF" & party != "OK" & party != "independent") #delete speeches from MPs, which did not belong to a party of interest or were independents

bz_22_idx <- which(data$party == "BZÖ" & data$gp == 22) #remove speeches from party "BZÖ" in legislative period (gp) 22 --> party was founded during legislative period and held only few speeches in this period
data <- data[-c(bz_22_idx), ]

ts_24_idx <- which(data$party == "STRONACH" & data$gp == 24) #remove speeches from party "STRONACH" in legislative period (gp) 24 --> party was founded during legislative period and held only few speeches in this period
data <- data[-c(ts_24_idx), ]

data <- filter(data, pred_smoothed != "Greeting") #filter out greeting sentences or direct addresses
data <- filter(data, pred_smoothed != "Parliament") #filter out sentences, which revolve around the topic of parliamentary procedures

# load additional data sets

IO_Austria <- readRDS("IO_Austria.RDS")

ManifestoSalience_Austria <- readRDS("ManifestoSalience_Austria.RDS")

PartySize_Austria <- readRDS("PartySize_Austria.RDS")

GovOpp_Austria <- readRDS("GovOpp_Austria.RDS")

# create data frame to work with

## count sentences for each issue per party and legislative period (gp)
Salience <- data %>%
  count(party, gp, pred_smoothed)

colnames(Salience) <- c("party", "gp", "issue", "n")

## get number of sentences for all speeches per party per gp
N_sent <- Salience %>%
  group_by(party, gp) %>%
  summarise(n_all_sent = sum(n))

## get issue attention in percent per party and year
Salience <- left_join(Salience, N_sent, by = c("party", "gp"))

Salience$speech_percent <- (Salience$n/Salience$n_all_sent) * 100

## calculate mean in issue attention across all parties for each year
df_mean <- Salience %>%
  group_by(gp, issue) %>%
  summarise(speech_mean_per_year = mean(speech_percent))

Salience <- left_join(Salience, df_mean, by = c("gp", "issue"))

## calculate difference from mean in issue attention for each party and year
Salience$speech_diff_to_mean <- Salience$speech_percent - Salience$speech_mean_per_year

# create final data

## prepare
df <- Salience
df$n <- NULL
df$n_all_sent <- NULL
df$gp <- as.numeric(as.character(df$gp))

## match with additional data

### match with IO_Austria (Issue Ownership for Austrian parties)
df_IO <- pivot_longer(IO_Austria, -c(issue), values_to = "IO", names_to = "party")
df <- left_join(df, df_IO, by = c("party", "issue"))

### match with ManifestoSalience_Austria (Issue Agenda in party manifestos)
#### change calculation from manifesto_diff_to_median to manifesto_diff_to_mean
df_manifesto_mean <- ManifestoSalience_Austria %>%
  group_by(year, issue) %>%
  summarise(manifesto_mean_per_year = mean(manifesto_percent))
ManifestoSalience_Austria <- left_join(ManifestoSalience_Austria, df_manifesto_mean,
                                       by = c("year", "issue"))
ManifestoSalience_Austria$manifesto_diff_to_mean <- ManifestoSalience_Austria$manifesto_percent - ManifestoSalience_Austria$manifesto_mean_per_year

df <- left_join(df, ManifestoSalience_Austria, by = c("party", "gp", "issue"))
df$year <- NULL

### match with PartySize_Austria (information on number of seats and dummy variable for size of party)
#### party with more than 1/4 of all seats ( >= 46 seats) is counted as a large party
df <- left_join(df, PartySize_Austria, by = c("party", "gp"))

### match with GovOpp_Austria (information on party position in government or opposition)
#### opposition party --> 0; government for whole period --> 1; government for part of period --> 0.5
df <- left_join(df, GovOpp_Austria, by = c("party", "gp"))
df$government <- as.numeric(df$government)

# save final data set for analysis
saveRDS(df, "Analysis_Data_Alternative.RDS")
